import spacy

# 加载spaCy的英文模型
nlp = spacy.load("en_core_web_sm")


def extract_nouns(text):
    # 使用spaCy处理文本
    doc = nlp(text)

    # 自定义过滤规则
    excluded_words = {"self", "this", "that"}  # 可以手动添加排除词
    nouns = [
        token.lemma_ for token in doc
        if token.pos_ == "NOUN"                             # 词性为名词
        and token.text.lower() not in excluded_words        # 不包含指定的排除词
        and token.dep_ not in ["amod", "poss", "compound"]  # 不包含修饰词
        and token.ent_type_ == ""                           # 不包含实体类型
        and not token.is_stop                               # 排除停用词
        and token.is_alpha                                  # 只考虑字母组成的单词
    ]

    return nouns


# 示例文本
# text1 = "These earbuds slap with 24hr battery life that's pure drip!"
# text2 = "This is a self stirring glass that can mix"
text3 = "$1,000 MAGNETIC GOLD FACE MASK! REVIEW + DEMO feat. JEFFREE STAR"
text4 = "JEFFREE STAR Magic Star CONCEALER & SETTING POWDER.. The Truth! "

# 提取名词
# nouns1 = extract_nouns(text1)
# nouns2 = extract_nouns(text2)
nouns3 = extract_nouns(text3)
nouns4 = extract_nouns(text4)

# print(nouns1)
# print(nouns2)
print(nouns3)
print(nouns4)
